{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 首先 import 必要的模块\n",
    "import pandas as pd \n",
    "import numpy as np\n",
    "\n",
    "#竞赛的评价指标为logloss，但LinearSVC不支持概率\n",
    "#所以在这个例子中我们用正确率accuracy_score作为模型选择的度量\n",
    "from sklearn.metrics import accuracy_score\n",
    "\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "from matplotlib import pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>feat_1</th>\n",
       "      <th>feat_2</th>\n",
       "      <th>feat_3</th>\n",
       "      <th>feat_4</th>\n",
       "      <th>feat_5</th>\n",
       "      <th>feat_6</th>\n",
       "      <th>feat_7</th>\n",
       "      <th>feat_8</th>\n",
       "      <th>feat_9</th>\n",
       "      <th>...</th>\n",
       "      <th>feat_84_tfidf</th>\n",
       "      <th>feat_85_tfidf</th>\n",
       "      <th>feat_86_tfidf</th>\n",
       "      <th>feat_87_tfidf</th>\n",
       "      <th>feat_88_tfidf</th>\n",
       "      <th>feat_89_tfidf</th>\n",
       "      <th>feat_90_tfidf</th>\n",
       "      <th>feat_91_tfidf</th>\n",
       "      <th>feat_92_tfidf</th>\n",
       "      <th>feat_93_tfidf</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0.016393</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.075886</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.013158</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.013158</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>0.016393</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.014286</td>\n",
       "      <td>0.315789</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.131579</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.460983</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.008244</td>\n",
       "      <td>0.022456</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.124622</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.145988</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 188 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   id    feat_1  feat_2  feat_3    feat_4    feat_5  feat_6    feat_7  \\\n",
       "0   1  0.016393     0.0     0.0  0.000000  0.000000     0.0  0.000000   \n",
       "1   2  0.000000     0.0     0.0  0.000000  0.000000     0.0  0.000000   \n",
       "2   3  0.000000     0.0     0.0  0.000000  0.000000     0.0  0.000000   \n",
       "3   4  0.016393     0.0     0.0  0.014286  0.315789     0.1  0.131579   \n",
       "4   5  0.000000     0.0     0.0  0.000000  0.000000     0.0  0.000000   \n",
       "\n",
       "     feat_8  feat_9      ...        feat_84_tfidf  feat_85_tfidf  \\\n",
       "0  0.000000     0.0      ...             0.000000       0.075886   \n",
       "1  0.013158     0.0      ...             0.000000       0.000000   \n",
       "2  0.013158     0.0      ...             0.000000       0.000000   \n",
       "3  0.000000     0.0      ...             0.460983       0.000000   \n",
       "4  0.000000     0.0      ...             0.000000       0.124622   \n",
       "\n",
       "   feat_86_tfidf  feat_87_tfidf  feat_88_tfidf  feat_89_tfidf  feat_90_tfidf  \\\n",
       "0       0.000000       0.000000            0.0            0.0       0.000000   \n",
       "1       0.000000       0.000000            0.0            0.0       0.000000   \n",
       "2       0.000000       0.000000            0.0            0.0       0.000000   \n",
       "3       0.008244       0.022456            0.0            0.0       0.000000   \n",
       "4       0.000000       0.000000            0.0            0.0       0.145988   \n",
       "\n",
       "   feat_91_tfidf  feat_92_tfidf  feat_93_tfidf  \n",
       "0            0.0            0.0            0.0  \n",
       "1            0.0            0.0            0.0  \n",
       "2            0.0            0.0            0.0  \n",
       "3            0.0            0.0            0.0  \n",
       "4            0.0            0.0            0.0  \n",
       "\n",
       "[5 rows x 188 columns]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train1 = pd.read_csv(\"E:\\\\Jupyter\\\\SVM\\\\Otto_FE_train_org.csv\")\n",
    "train2 = pd.read_csv(\"E:\\\\Jupyter\\\\SVM\\\\Otto_FE_train_tfidf.csv\")\n",
    "#train = pd.read_csv(dpath +\"Otto_FE_train_tfidf.csv\")\n",
    "\n",
    "#去掉多余的id\n",
    "train2 = train2.drop([\"id\",\"target\"], axis=1)\n",
    "train =  pd.concat([train1, train2], axis = 1, ignore_index=False)\n",
    "train.head()\n",
    "\n",
    "\n",
    "#del train1\n",
    "#del train2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 61878 entries, 0 to 61877\n",
      "Columns: 188 entries, id to feat_93_tfidf\n",
      "dtypes: float64(186), int64(1), object(1)\n",
      "memory usage: 88.8+ MB\n"
     ]
    }
   ],
   "source": [
    "train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将类别字符串变成数字\n",
    "# drop ids and get labels\n",
    "y_train = train['target']   #形式为Class_x\n",
    "X_train = train.drop([\"id\", \"target\"], axis=1)\n",
    "\n",
    "#保存特征名字以备后用（可视化）\n",
    "feat_names = X_train.columns \n",
    "\n",
    "#sklearn的学习器大多之一稀疏数据输入，模型训练会快很多\n",
    "from scipy.sparse import csr_matrix\n",
    "X_train = csr_matrix(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X_train_part, X_val, y_train_part, y_val = train_test_split(X_train, y_train, train_size = 1000,random_state = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1000, 186)\n"
     ]
    }
   ],
   "source": [
    "print (X_train_part.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.svm import LinearSVC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n",
       "     intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n",
       "     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n",
       "     verbose=0)"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "SVC1 = LinearSVC()\n",
    "\n",
    "#2. 模型训练\n",
    "SVC1.fit(X_train_part, y_train_part)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy is:  0.7310194158809422\n",
      "Classification report for classifier LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n",
      "     intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n",
      "     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n",
      "     verbose=0):\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "     Class_1       0.44      0.25      0.32      1899\n",
      "     Class_2       0.66      0.80      0.72     15850\n",
      "     Class_3       0.45      0.38      0.41      7866\n",
      "     Class_4       0.52      0.11      0.19      2643\n",
      "     Class_5       0.90      0.94      0.92      2707\n",
      "     Class_6       0.92      0.91      0.91     13906\n",
      "     Class_7       0.63      0.52      0.57      2790\n",
      "     Class_8       0.80      0.90      0.85      8332\n",
      "     Class_9       0.78      0.80      0.79      4885\n",
      "\n",
      "   micro avg       0.73      0.73      0.73     60878\n",
      "   macro avg       0.68      0.62      0.63     60878\n",
      "weighted avg       0.72      0.73      0.71     60878\n",
      "\n",
      "\n",
      "Confusion matrix:\n",
      "[[  467   125    19     4     8   208    52   449   567]\n",
      " [   17 12601  2549   150   157    78   117   138    43]\n",
      " [   12  4321  3010    88    47    22   260    81    25]\n",
      " [    6  1383   663   298    26   123   106    34     4]\n",
      " [    0    98    12     2  2532     4     0    49    10]\n",
      " [   86   154    45    13     5 12701   203   440   259]\n",
      " [  102   310   308    10    18   194  1464   351    33]\n",
      " [  144    82    85     1     5   254    73  7532   156]\n",
      " [  220   122    19     5     7   275    45   294  3898]]\n"
     ]
    }
   ],
   "source": [
    "y_predict = SVC1.predict(X_val)\n",
    "\n",
    "print(\"accuracy is: \",accuracy_score(y_val, y_predict))\n",
    "\n",
    "print(\"Classification report for classifier %s:\\n%s\\n\"\n",
    "      % (SVC1, classification_report(y_val, y_predict)))\n",
    "print(\"Confusion matrix:\\n%s\" % confusion_matrix(y_val, y_predict))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 进行超参数调优\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "def fit_grid_point_Linear(C, X_train, y_train, X_val, y_val):\n",
    "    \n",
    "    # 在训练集上训练SVC\n",
    "    SVC2 =  LinearSVC( C = C)\n",
    "    SVC2 = SVC2.fit(X_train, y_train)\n",
    "    \n",
    "    # 在校验集上返回accuracy\n",
    "    accuracy = SVC2.score(X_val, y_val)\n",
    "    \n",
    "    print(\"C= {} : accuracy= {} \" .format(C, accuracy))\n",
    "    return accuracy"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "C= 0.1 : accuracy= 0.7260258221360755 \n",
      "C= 1.0 : accuracy= 0.7310194158809422 \n",
      "C= 10.0 : accuracy= 0.7091560169519366 \n",
      "C= 100.0 : accuracy= 0.6776011038470383 \n",
      "C= 1000.0 : accuracy= 0.6327573179145176 \n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "No handles with labels found to put in legend.\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3XmYFNW5x/Hvy64BZRuVCygQBwU30BGNKCoGRYMsBhGUoAYxajBqFNG4o1HjrleSuMUtKioK4hZEcYlGhHEXkEVwmUgiEjCooCzv/ePUXJthZroZprp6un+f5+nH7urT3b8uZ+al6pw6x9wdERGR6tRLOoCIiOQ+FQsREUlLxUJERNJSsRARkbRULEREJC0VCxERSUvFQkRE0lKxEBGRtFQsREQkrQZJB6gtrVu39g4dOiQdQ0SkTnnzzTe/dPeidO3yplh06NCB0tLSpGOIiNQpZvZJJu10GkpERNJSsRARkbRULEREJK286bMQESl0a9asoaysjNWrV2/0XJMmTWjXrh0NGzas0XurWIiI5ImysjKaNWtGhw4dMLP/3+7uLFu2jLKyMjp27Fij99ZpKBGRPLF69WpatWq1QaEAMDNatWpV6RFHplQsRETySMVCkW57pnQaStJyh6VLYdEiWLwYPvsMDjkE9tor6WQiki0qFgLAqlXw8cehIJTfFi/+4f4332zY3gxOOAGuvBK22y6JxCKSTSoWBWL9eliyZOMiUH5bsmTD9ltuCZ06hdshh0DHjj88btUKbrgBbrwRJk6ECy+EM86Axo2T+W4i8gN3r/SUk7tv1vva5r5BrigpKfFCn+5j5cqNC0H548WL4bvvfmhrBu3bb1gEym8dO8I224Q21VmwAM45B6ZMgR//OBSQI49M/zoRicfixYtp1qzZRp3c5aOhVq5cudFoKDN7091L0r23ikUdsnYtlJVVfXTw5Zcbtt9qq/BHPLUIlN/ffvvaOxJ47jk480yYOxf69AlHHLvsUjvvLSKZq8l1FioWddTy5RsXgfLC8MknoWCUa9AAdtih6qODFi2y96/8NWvgz3+Giy8ORzinnQaXXgotW2bn80WkZlQsctT334c/+lUdHXz11YbtW7eu/MigUydo1y4UjFzy5ZdwySWhcDRvDpdfDiefnHs5RSRQsUhI6jDTyo4OyspCZ3O5xo2hQ4fKjww6dgynkuqi998Pnd4vvgi77go33RQ6ykUkt2RaLPTvvRpYtWrDjuOKheHbbzds36ZNKAAHHrjx0UGbNlAvDy+N3G03eOEFmDwZzj4bfvpTGDQIrrsufG8RqVt0ZFGJ1GGmlR0dVDfMtOIpow4dwvOFbPXq0On9+9+Hvo2zz4bzz4dmzZJOJiI6DZWhr76Cu+7asCh8/HHlw0wr6zfo1AmKijRcNBOffx6KxH33hSOqq6+G4cPz88hKpK5QscjQihVh1NDWW1d9dFCbw0wF3ngDfvMbmDkTevSAm2+GffdNOpVIYVKfRYaaN4f//CcUDMmOffaB11+HBx6AsWPhJz+BX/wCrroK2rZNOp2IVEYnAFChSEK9eqFAzJ8Pv/sdPPII7LRTmGtqM2ZRFpGYqFhIopo2DR3fc+bAYYfBBRdAly7w2GNhGLKI5AYVC8kJnTqFAvHCC2GU1ODB0Ls3vPde0slEBFQsJMf07g1vvQV//GO4sK97dzj11I3nvRKR7FKxkJzToEEoEPPnw+jRcMcdUFwcRk2tWZN0OpHCpGIhOatly1Ag3nsP9t47zGy7xx4wdWrSyUQKj4qF5LyuXUOBmDIlHFn07RvWzViwIOlkIoUj1mJhZn3NbJ6ZLTSz8yp5/kYzeye6zTezFdH2bmb2upnNNrP3zOyYOHNK7jMLBeKDD+Caa+Dll8OaGWPGbDxTr4jUvtiKhZnVB8YDhwNdgWFm1jW1jbuf5e7d3L0b8L/A49FT3wIj3H0XoC9wk5k1jyur1B2NG4cCMX9+uE7j+uuhc+cwZcu6dUmnE8lfcR5Z9AAWuvsid/8emAAMqKb9MOAhAHef7+4LovufA18ARTFmlTpmu+1CgZg5E3bcEU46KUwd8uqrSScTyU9xFou2wGcpj8uibRsxsx2AjsD0Sp7rATQCPqrkuZPNrNTMSpcuXVoroaVuKSkJBeLBB+GLL+CAA2DYMPj006STieSXOItFZfOwVnVN7lBgortvcCLBzNoA9wMnuvv6ii9y99vdvcTdS4qKdOBRqMxCgfjww7BK3+TJsPPOcNllG68tIiI1E2exKAPapzxuB3xeRduhRKegypnZVsDTwIXuPiOWhJJXfvSjsO73hx+GzvBLLw1F4+GHNXWIyOaKs1jMAorNrKOZNSIUhCkVG5nZTkAL4PWUbY2AScB97v5ojBklD+2wQygQL78MrVrB0KHQq1e4MlxEaia2YuHua4HRwFRgLvCIu882s3Fm1j+l6TBggm+4sMYQoBdwQsrQ2m5xZZX81KsXlJbC7bfDvHmhf2PUqNC3ISKbpuAXP5LCsGIFXH453HJLWOb24ovh9NOhUaOkk4kkK9PFj3QFtxSE5s3DNRkffBBGTJ1zDuy6Kzz9tPozRDKhYiEFZaed4Kmn4JlnwgJM/frBEUfA3LlJJxPJbSoWUpAOPzxMgX7jjWGJ1913h7POguXLk04mkptULKRgNWwYZrJdsABGjgwz3HbuDLfdpqlDRCpSsZCCV1QEf/5zGFrbtSuccgrsuSe89FLSyURyh4qFSKRbt1AgHn00zGR78MFw9NHw8cdJJxNJnoqFSAqzsP733LlhqO0zz4SrwC+6CL75Jul0IslRsRCpxBZbwIUXhov5Bg+GK64II6keeEBDbaUwqViIVKNdO/jrX+G116BNGxg+HHr2hFmzkk4mkl0qFiIZ2G8/eOMNuPtuWLQorJ1x4omwZEnSyUSyQ8VCJEP16sEJJ4RV+saODWtodO4MV18Nq1cnnU4kXioWIptoq61CgZg9Gw45BM4/P6wHPnmy+jMkf6lYiNTQjjuGAvHcc9CkCQwaBH36hPmnRPKNioXIZurTB959F/73f8OFfXvsAaNHw7JlSScTqT0qFiK1oEGDUCAWLIBTT4U//QmKi+HWW2Ht2qTTiWw+FQuRWtSqVSgQ77wD3buHNTO6dYN//CPpZCKbR8VCJAa77QbPPw+TJoUrvw86KAy7FamrVCxEYmIGAweGfoxeveCXv4QxYzSjrdRNKhYiMWvRAp59Fk47Da67LhSQ//436VQim0bFQiQLGjaE8eNDf8azz4YpQzSbrdQlKhYiWfTrX4diUVYGe+8Nr76adCKRzKhYiGRZnz4wY0Y4PdW7N9xzT9KJRNJTsRBJwE47hYLRq1eYkPDcc9XxLblNxUIkIS1bhlNSp54K114bpgtZuTLpVCKVU7EQSVDDhvDHP4apQp5+Wh3fkrtULERywOjR4Sjj00/DWhmvvZZ0IpENqViI5IhDDw39GFtvHTq+77036UQiP1CxEMkhO+8cVuTr2TMstDR2rDq+JTeoWIjkmJYtYepU+NWv4Jpr4Kij4Ouvk04lhU7FQiQHNWwYpjm/5RZ46qlwpPHJJ0mnkkKmYiGSo8zCFOfPPBMKRY8e8PrrSaeSQhVrsTCzvmY2z8wWmtl5lTx/o5m9E93mm9mKlOeON7MF0e34OHOK5LLDDgsd382ahanO778/6URSiBrE9cZmVh8YD/QByoBZZjbF3eeUt3H3s1Lanw50j+63BC4BSgAH3oxeuzyuvCK5rLzje/BgGDEC5syB3/8e6uncgGRJnD9qPYCF7r7I3b8HJgADqmk/DHgoun8YMM3d/xMViGlA3xiziuS8Vq1Cx/eoUXD11er4luyKs1i0BT5LeVwWbduIme0AdASmb+prRQpJo0Zw221w003w5JOw//7hQj6RuMVZLKySbV5F26HARHcvH1Ge0WvN7GQzKzWz0qVLl9YwpkjdYgZnnBGmB1m8OEx1ro5viVucxaIMaJ/yuB3weRVth/LDKaiMX+vut7t7ibuXFBUVbWZckbqlb9/Q8d20KRx8MDzwQNKJJJ/FWSxmAcVm1tHMGhEKwpSKjcxsJ6AFkPpvo6nAoWbWwsxaAIdG20QkRZcuMHMm7LsvDB8OF1wA69cnnUryUWzFwt3XAqMJf+TnAo+4+2wzG2dm/VOaDgMmuLunvPY/wOWEgjMLGBdtE5EKWrWC554LHd9XXhlGTKnjW2qbpfyNrtNKSkq8tLQ06RgiiXGHm2+Gs8+G3XeHKVOgffv0r5PCZmZvuntJunYapS2SJ8zgzDPD9CAffRQ6vmfMSDqV5AsVC5E8c/jhYXTUlluGK74ffDDpRJIPVCxE8tAuu4SO7332geOOgwsvVMe3bB4VC5E81bo1TJsGI0eGqUGOPhq++SbpVFJXqViI5LFGjeCOO+CGG2DyZDjgAPjss/SvE6lIxUIkz5nBWWeF6UEWLgxTnb/xRtKppK5RsRApEEccETq+t9gCDjwQHnoo/WtEyqlYiBSQ8o7vHj3g2GPhoovU8S2ZUbEQKTCtW8Pzz8OJJ8IVV8CQIer4lvRULEQKUKNGcNddcN118Pjj0KsXlJUlnUpymYqFSIEyC1ODTJkC8+eHU1MzZyadSnJVRsXCzB4zs5+ZmYqLSJ7p1y90fDduHDq+J0xIOpHkokz/+P8JOBZYYGZXm9nOMWYSkSzbdddwVFFSAsOGwSWXqONbNpRRsXD35939OGBP4GNgmpn9w8xONLOGcQYUkewoKvqh43vcODjmGPj226RTSa7I+LSSmbUCTgBOAt4GbiYUj2mxJBORrGvcOHR8X3stPPZY6Pj+5z+TTiW5INM+i8eBvwNbAke6e393f9jdTweaxhlQRLLLDM45J3R8z5sXpjrXUjGS6ZHFre7e1d2vcvclqU9ksmiGiNQ9/frBP/4RhtkecAA88kjSiSRJmRaLLmbWvPxBtDb2aTFlEpEcsdtuoeN7r71CH8all4YV+aTwZFosRrn7ivIH7r4cGBVPJBHJJdtsAy+8AMcfD5ddBkOHquO7EGVaLOqZmZU/MLP6QKN4IolIrmncGO6+G665Bh59NFyPoY7vwpJpsZgKPGJmh5hZb+Ah4G/xxRKRXGMGY8aEdTE+/DBc8a2O78KRabEYC0wHTgV+DbwAnBtXKBHJXf37w2uvQYMGYWjto48mnUiyIdOL8ta7+5/cfbC7/9zdb3P3dXGHE5HctPvuMGsWdO8eZq0dN04d3/ku0+ssis1sopnNMbNF5be4w4lI7tpmG5g+HUaMCNODHHssrFqVdCqJS6anoe4mzA+1FjgYuA+4P65QIlI3NG4M99wDV18NDz8cOr4//zzpVBKHTIvFFu7+AmDu/om7Xwr0ji+WiNQVZjB2LEyaBHPmhI7vt95KOpXUtkyLxepoevIFZjbazAYB28SYS0TqmAEDQsd3/fqw//4wcWLSiaQ2ZVosziTMC/UbYC9gOHB8XKFEpG7aY49wxXe3bnD00XD55er4zhdpi0V0Ad4Qd//a3cvc/cRoRNSMLOQTkTpm221Dx/cvfgEXX6yO73yRtlhEQ2T3Sr2CW0SkOk2awL33wlVXhY7vgw6CJUvSvkxyWKanod4GnjCzX5jZUeW3OIOJSN1mBuedB48/DrNnh47vt99OOpXUVKbFoiWwjDAC6sjo1i/di8ysr5nNM7OFZnZeFW2GRNdvzDazB1O2XxNtm2tmt+jIRqRuGjgQXn01FI/99w/FQ+qeBpk0cvcTN/WNo76O8UAfoAyYZWZT3H1OSpti4Hygp7svN7Ntou37AT2B3aOmrwIHAi9tag4RSV63bqHje+BA+PnP4Yor4He/CwVE6oaMioWZ3Q1sNKbB3X9Zzct6AAvdfVH0HhOAAcCclDajgPHRlOe4+xflbw00Icxsa0BD4N+ZZBWR3LTddvDSS3DSSXDhheGajDvvhC22SDqZZCKjYgE8lXK/CTAISHedZlvgs5THZcA+Fdp0BjCz14D6wKXu/jd3f93MXgSWEIrFre4+N8OsIpKjmjSB+++Hrl3hggvgo4/CLLbbbZd0Mkkn09NQj6U+NrOHgOfTvKyyA8yKRycNgGLgIKAd8Hcz2xVoDXSJtgFMM7Ne7v5KhRwnAycDbL/99um/iIgkziycgurSBYYPD2t8T5kSJiWU3JVpB3dFxUC6v85lQPuUx+3Y+GikDHjC3de4+2JgXvTeg4AZ0bUdXwPPAvtW/AB3v93dS9y9pKioqIZfRUSSMGhQ6PiG0PE9aVKyeaR6mc46u9LM/lt+A54krHFRnVlAsZl1NLNGwFBgSoU2kwkTE2JmrQmnpRYBnwIHmlkDM2tI6NzWaSiRPNO9e5jqfLfd4Kij4MordcV3rsr0NFSzTX1jd19rZqMJq+zVB/7i7rPNbBxQ6u5ToucONbM5wDpgjLsvM7OJhGG67xNOXf3N3Z/c1AwikvvKO75Hjgz9GOUd302aJJ1MUplnUMajiQOnu/tX0ePmwEHuPjnmfBkrKSnxUq3xKFJnuYcjiwsvhD594OmnoWHDpFPlPzN7091L0rXLtM/ikvJCAeDuK4BLahpORKQis3BkcdddMG0anH66TknlkkyHzlZWVDJ9rYhIxn75S1iwICyo1KULnHFG0okEMj+yKDWzG8zsx2bWycxuBN6MM5iIFK7f/z6Mlvrtb8PpKElepsXidOB74GHgEWAV8Ou4QolIYatXL1y8160bDB0K77+fdCLJdDTUN0ClEwGKiMThRz8KF+v16AH9+oW5pbbdNulUhSvT6yymRSOgyh+3MLOp8cUSEYG2beHJJ+HLL8MkhFpEKTmZnoZqHY2AAiCa+E9rcItI7PbcE/76V5gxI3R+a4RUMjItFuvN7P+n9zCzDlQyC62ISBwGDQqr7k2YAOPGJZ2mMGU6/PUC4FUzezl63ItoAj8RkWwYOxY+/BAuvRQ6d4Zhw5JOVFgyOrJw978BJYSJ/h4GziaMiBIRyQozuO02OOAAOPHEcFpKsifTDu6TgBcIReJs4H7g0vhiiYhsrHHjsCxr27YwYAB88knSiQpHpn0WZwB7A5+4+8FAd2BpbKlERKrQujU89RR8910YUrtyZdKJCkOmxWK1u68GMLPG7v4hsFN8sUREqtalCzz6KMydG/ou1q1LOlH+y7RYlEXXWUwmrFr3BOmXVRURiU2fPnDrrWE6kDFjkk6T/zK9gntQdPfSaG3srYG/xZZKRCQDp5wSRkjdeCPstBP86ldJJ8pfmzxzrLu/nL6ViEh2XH99mKX217+GH/8YfvrTpBPlp5quwS0ikhPq14eHHgr9GIMHhyMNqX0qFiJS5221VZhDqnHjMEJq2bKkE+UfFQsRyQsdOsDkyVBWBkcdBd9/n3Si/KJiISJ54yc/gbvvhldeCZ3fmnSw9mhpVBHJK8OGwbx5cNllsPPOcO65SSfKDyoWIpJ3LrkkdHSfdx4UF4dZa2Xz6DSUiOQds3A6au+9YfhweOutpBPVfSoWIpKXttgCnngCWrWCI4+Ef/4z6UR1m4qFiOSt7bYLkw7+97/Qvz98803SieouFQsRyWu77x4u2nv7bRgxAtavTzpR3aRiISJ5r1+/MC3I44/DRRclnaZu0mgoESkIZ54ZRkhdeWWYdHDEiKQT1S06shCRgmAWpjTv3RtOOgn+/vekE9UtKhYiUjAaNoSJE6Fjx3DtxUcfJZ2o7lCxEJGC0qJFGCHlHobUrliRdKK6IdZiYWZ9zWyemS00s/OqaDPEzOaY2WwzezBl+/Zm9pyZzY2e7xBnVhEpHMXFobN74UIYMgTWrk06Ue6LrViYWX1gPHA40BUYZmZdK7QpBs4Herr7LsCZKU/fB1zr7l2AHsAXcWUVkcJz4IFw220wbRr85jeadDCdOEdD9QAWuvsiADObAAwA5qS0GQWMd/flAO7+RdS2K9DA3adF27+OMaeIFKgTTwwjpK65JiyedPrpSSfKXXGehmoLfJbyuCzalqoz0NnMXjOzGWbWN2X7CjN73MzeNrNroyMVEZFaddVVMGBAGFr77LNJp8ldcRYLq2RbxQO9BkAxcBAwDLjTzJpH2w8AzgH2BjoBJ2z0AWYnm1mpmZUuXbq09pKLSMGoVw/++tdwpfcxx8AHHySdKDfFWSzKgPYpj9sBn1fS5gl3X+Pui4F5hOJRBrzt7ovcfS0wGdiz4ge4++3uXuLuJUVFRbF8CRHJf02bhmVZmzYNV3t/oR7SjcRZLGYBxWbW0cwaAUOBKRXaTAYOBjCz1oTTT4ui17Yws/IK0JsN+zpERGpVu3YwZUooFAMHwurVSSfKLbEVi+iIYDQwFZgLPOLus81snJn1j5pNBZaZ2RzgRWCMuy9z93WEU1AvmNn7hFNad8SVVUQEoKQE7rsPXn8dRo7UCKlU5nmyN0pKSry0tDTpGCKSB668Ei64AMaNy/+JB83sTXcvSddOEwmKiFRw/vlhSO3FF4dJB4cMSTpR8jTdh4hIBWZwxx3QsyccfzzMnJl0ouSpWIiIVKJxY5g0Cdq0Cavsffpp0omSpWIhIlKFoqIw6eCqVWHSwZUrk06UHBULEZFqdO0KjzwCs2fDccfBunVJJ0qGioWISBqHHQa33BIu3Bs7Nuk0ydBoKBGRDJx2Whghdf31YYTUqFFJJ8ouHVmIiGTohhugb99QOKZPTzpNdqlYiIhkqEEDmDABOneGn/8c5s9POlH2qFiIiGyCrbcOI6QaNICf/QyWLUs6UXaoWIiIbKKOHWHy5HDtxeDB8P33SSeKn4qFiEgN9OwJd90FL70Ep56a/5MOajSUiEgNDR8O8+bBFVeEZVnPOSfpRPFRsRAR2QyXXRYKxrnnho7v/v3Tv6Yu0mkoEZHNUK8e3HNPWAvj2GPhnXeSThQPFQsRkc205ZbwxBPQokWYQ2rJkqQT1T4VCxGRWtCmTZgOZPlyGDAAvv026US1S8VCRKSWdOsGDz4IpaVhHYz165NOVHtULEREalH//nDttTBxIlxySdJpao9GQ4mI1LLf/jZMOnjFFWHSweHDk060+XRkISJSy8xg/Hg4+GAYORJeey3pRJtPxUJEJAaNGoVTUTvsAAMHwqJFSSfaPCoWIiIxadkyTDq4bl0YUvvVV0knqjkVCxGRGHXuHI4w5s+HY46BtWuTTlQzKhYiIjHr3Rv+9CeYOhXOOivpNDWj0VAiIllw0kkbLss6enTSiTaNioWISJb84Q/hdNQZZ0BxMRx2WNKJMqfTUCIiWVK/frjCe7fdYMgQmDMn6USZU7EQEcmipk3DHFJbbgn9+sHSpUknyoyKhYhIlrVvH2apXbIEBg2C775LOlF6KhYiIgno0QPuvTdc3T1qVO4vy6oObhGRhAwZEjq8L7oIdt4Zfve7pBNVLdYjCzPra2bzzGyhmZ1XRZshZjbHzGab2YMVntvKzP5pZrfGmVNEJCkXXADHHRf+O3Fi0mmqFtuRhZnVB8YDfYAyYJaZTXH3OSltioHzgZ7uvtzMtqnwNpcDL8eVUUQkaWZw552weDGMGBHmktp776RTbSzOI4sewEJ3X+Tu3wMTgAEV2owCxrv7cgB3/6L8CTPbC9gWeC7GjCIiiWvSBCZNgm23DethfPZZ0ok2FmexaAukfuWyaFuqzkBnM3vNzGaYWV8AM6sHXA+Mqe4DzOxkMys1s9KldWX8mYhIJbbZJkw6+M03oWB8/XXSiTYUZ7GwSrZV7O9vABQDBwHDgDvNrDlwGvCMu1dbX939dncvcfeSoqKiWogsIpKcXXaBhx+G994L/Rjr1iWd6AdxFosyoH3K43bA55W0ecLd17j7YmAeoXj8BBhtZh8D1wEjzOzqGLOKiOSEww+Hm26CKVPg/POTTvODOIfOzgKKzawj8E9gKHBshTaTCUcU95hZa8JpqUXuflx5AzM7AShx90pHU4mI5JvRo8Okg9deGyYdHDky6UQxHlm4+1pgNDAVmAs84u6zzWycmfWPmk0FlpnZHOBFYIy7L4srk4hIXWAGN98Mhx4Kp5wCL72UdCIwz/XLBjNUUlLipaWlSccQEak1K1bAfvvBv/4Fb7wRZqqtbWb2pruXpGun6T5ERHJU8+ZhhFS9emHSweXLk8uiYiEiksM6dYLJk+Hjj2HwYFizJpkcKhYiIjlu//3hjjtg+vTQ+Z1E74EmEhQRqQNGjIB58+DKK8Okg9ley1vFQkSkjrj88lAwzj4bdtwRjjwye5+t01AiInVEvXpw332w554wbBi8+24WPzt7HyUiIptryy3D1d3Nm4cji3/9Kzufq2IhIlLH/M//hHW8ly2DAQNg1ar4P1PFQkSkDureHR54AGbNghNOgPXr4/08dXCLiNRRAwfCH/6QnenMVSxEROqwMdWu+lN7dBpKRETSUrEQEZG0VCxERCQtFQsREUlLxUJERNJSsRARkbRULEREJC0VCxERSStv1uA2s6XAJ5vxFq2BL2spTm1Srk2jXJtGuTZNPubawd2L0jXKm2KxucysNJNFy7NNuTaNcm0a5do0hZxLp6FERCQtFQsREUlLxeIHtycdoArKtWmUa9Mo16Yp2FzqsxARkbR0ZCEiImkVbLEws6PNbLaZrTezKkcRmFlfM5tnZgvN7Lws5GppZtPMbEH03xZVtFtnZu9Etykx5qn2+5tZYzN7OHr+DTPrEFeWTch0gpktTdk/J8WdKfrcv5jZF2b2QRXPm5ndEuV+z8z2zJFcB5nZVyn76+Is5WpvZi+a2dzod/GMStpkfZ9lmCvr+8zMmpjZTDN7N8p1WSVt4vt9dPeCvAFdgJ2Al4CSKtrUBz4COgGNgHeBrjHnugY4L7p/HvCHKtp9nYV9lPb7A6cBf47uDwUezoFMJwC3JvAz1QvYE/igiuePAJ4FDNgXeCNHch0EPJXA/moD7BndbwbMr+T/Zdb3WYa5sr7Pon3QNLrfEHgD2LdCm9h+Hwv2yMLd57r7vDTNegAL3X2Ru38PTAAGxBxtAHBvdP9eYGDMn1edTL5/at6JwCFmZglnSoS7vwL8p5omA4D7PJgBNDezNjmQKxHuvsTd34rurwTmAm0rNMv6PsswV9ZF+6B8AdWG0a1ip3Nsv48FWywy1Bb4LOVxGfH/0Gxq2l1rAAAExklEQVTr7ksg/NAC21TRromZlZrZDDOLq6Bk8v3/v427rwW+AlrFlCfTTAA/j05bTDSz9jHm2RRJ/Dxl6ifR6Y1nzWyXbH94dLqkO+Ffy6kS3WfV5IIE9pmZ1Tezd4AvgGnuXuX+qu3fx7xeg9vMnge2q+SpC9z9iUzeopJtmz18rLpcm/A227v752bWCZhuZu+7+0ebm62CTL5/LPuoGpl83pPAQ+7+nZmdQviXVu8YM2Uq2/sqU28Rpnz42syOACYDxdn6cDNrCjwGnOnu/634dCUvyco+S5MrkX3m7uuAbmbWHJhkZru6e2pfVGz7K6+Lhbv/dDPfogxI/VdpO+DzzXzPanOZ2b/NrI27L4kOt7+o4j0+j/67yMxeIvzrp7aLRSbfv7xNmZk1ALYm3lMeaTO5+7KUh3cAf4gxz6aI5edpc6X+IXT3Z8zsj2bW2t1jnwPJzBoS/iA/4O6PV9IkkX2WLleS+yz6zBXR731fILVYxPb7qNNQ1ZsFFJtZRzNrROgwim3kUWQKcHx0/3hgoyMgM2thZo2j+62BnsCcGLJk8v1T8w4GpnvUuxaTtJkqnNPuTzjnnAumACOiET77Al+Vn3JMkpltV35e28x6EP4uLKv+VbXyuQbcBcx19xuqaJb1fZZJriT2mZkVRUcUmNkWwE+BDys0i+/3MZu9+bl0AwYRqvB3wL+BqdH2/wGeSWl3BGE0xEeE01dx52oFvAAsiP7bMtpeAtwZ3d8PeJ8wEuh9YGSMeTb6/sA4oH90vwnwKLAQmAl0ysI+SpfpKmB2tH9eBHbO0s/UQ8ASYE30szUSOAU4JXregPFR7vepYhReArlGp+yvGcB+Wcq1P+EUyXvAO9HtiKT3WYa5sr7PgN2Bt6NcHwAXR9uz8vuoK7hFRCQtnYYSEZG0VCxERCQtFQsREUlLxUJERNJSsRARkbRULEQ2gZl9nb5Vta+fGF11j5k1NbPbzOyjaBbRV8xsHzNrFN3P64tmpW5RsRDJkmj+oPruvijadCfh6tpid9+FMFtuaw8TJL4AHJNIUJFKqFiI1EB0RfG1ZvaBmb1vZsdE2+tFUz/MNrOnzOwZMxscvew4oivyzezHwD7Ahe6+HsLULe7+dNR2ctReJCfoMFekZo4CugF7AK2BWWb2CmHqlQ7AboQZg+cCf4le05NwNTXALsA7HiaGq8wHwN6xJBepAR1ZiNTM/oSZbde5+7+Blwl/3PcHHnX39e7+L8J0I+XaAEszefOoiHxvZs1qObdIjahYiNRMVQvKVLfQzCrC3D0Q5hXaw8yq+x1sDKyuQTaRWqdiIVIzrwDHRIvRFBGWLp0JvEpYeKmemW1LWH6z3FxgRwAPa4+UApelzF5abGYDovutgKXuviZbX0ikOioWIjUziTD757vAdODc6LTTY4SZXT8AbiOssPZV9Jqn2bB4nERYBGuhmb1PWHujfK2Gg4Fn4v0KIpnTrLMitczMmnpYQa0V4Wijp7v/K1qD4MXocVUd2+Xv8ThwvqdfJ14kKzQaSqT2PRUtUtMIuDw64sDdV5nZJYR1kj+t6sXRok6TVSgkl+jIQkRE0lKfhYiIpKViISIiaalYiIhIWioWIiKSloqFiIikpWIhIiJp/R+HYBPBWEmEBwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#需要调优的参数\n",
    "#SVM太慢，每次只调一个参数（这里只调C，penalty为‘l2'）\n",
    "C_s = np.logspace(-1, 3, 5)# logspace(a,b,N)把10的a次方到10的b次方区间分成N份  \n",
    "#penalty_s = ['l1','l2']\n",
    "\n",
    "accuracy_s = []\n",
    "for i, oneC in enumerate(C_s):\n",
    "#    for j, penalty in enumerate(penalty_s):\n",
    "    tmp = fit_grid_point_Linear(oneC, X_train_part, y_train_part, X_val, y_val)\n",
    "    accuracy_s.append(tmp)\n",
    "\n",
    "x_axis = np.log10(C_s)\n",
    "#for j, penalty in enumerate(penalty_s):\n",
    "plt.plot(x_axis, np.array(accuracy_s), 'b-')\n",
    "    \n",
    "plt.legend()\n",
    "plt.xlabel( 'log(C)' )                                                                                                      \n",
    "plt.ylabel( 'accuracy' )\n",
    "#plt.savefig('SVM_Otto.png' )\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.0\n"
     ]
    }
   ],
   "source": [
    "index = np.argmax(accuracy_s, axis=None)\n",
    "Best_C = C_s[ index ]\n",
    "\n",
    "print(Best_C)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "随着C的增大，训练误差也逐渐增大。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
